# -*- coding: utf-8 -*-
# @Time        :2025/3/6 10:58
# @Author      :文刀水寿
# @File        : 19_爬虫_解析_获取百度的百度一下.py
"""
 @Description :
"""
# 1.获取网页源码
# 2.xptah解析位置
# 3.输出

from lxml import etree
import urllib.request

url = 'https://www.baidu.com/'

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0',
    "cookie": "ZFY=:AyCv634CPxvhW:AuiBN:BB:BR4pzoPK8wtj8Hzkff:AidqM:C; BIDUPSID=1D18A232DA1CA3BD6E9DF4FA5B9F0F80; PSTM=1741164133; BAIDUID=1D18A232DA1CA3BDAFE6E6FC89F0EDB3:FG=1; BD_UPN=12314753; BA_HECTOR=0k21a4a0002kak8k2g0l018h9i8deo1jsg3j51v; BAIDUID_BFESS=1D18A232DA1CA3BDAFE6E6FC89F0EDB3:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=61027_62128_62167_62232_62283_62325_62340_62346_62330_62368_62371_62391_62421_62423; BD_HOME=1"
}

request = urllib.request.Request(url=url, headers=headers)

response = urllib.request.urlopen(request)

content = response.read().decode('utf8')

from lxml import etree

tree = etree.HTML(content)

result = tree.xpath('//span/input[@id="su"]/@value')[0]

print(result)
